
## @knitr CLUSTER_AID_SETUP

rm(list=ls())


# from \url{https://www.r-bloggers.com/identifying-the-os-from-r/}
get_os <- function(){
  sysinf <- Sys.info()
  if (!is.null(sysinf)){
    os <- sysinf['sysname']
    if (os == 'Darwin')
      os <- "osx"
  } else { ## mystery machine
    os <- .Platform$OS.type
    if (grepl("^darwin", R.version$os))
      os <- "osx"
    if (grepl("linux-gnu", R.version$os))
      os <- "linux"
  }
  tolower(os)
}

get_machine  <- function(){
    sysinf  <- Sys.info()
    if (!is.null(sysinf)){
        my.machine  <- sysinf['nodename']
    }
    tolower(my.machine)
}


if(get_os()=="windows" & get_machine()=="mompracen") {
    path <- setwd('C:/Users/giaco/Documents/MEGAsync/')
} else if (get_os()=="windows" & get_machine()!="mompracen") {
    path <- setwd('C:/Users/grieco/Dropbox/')
} else if (get_machine()=="aramis") {
    path <- setwd('/home/giacomo68/Dropbox/')
} else {
    path <- setwd('/home/giacomo/Dropbox/')
}


library(survival)
library(xtable)
library(ggplot2)
library(english)
library(Publish)
library(showtext)
library(extrafont)
library(mice)
library(mitools)
library(sandwich)
library(lmtest)


Data <- read.csv('gc-jg-project/kampala/data/data-kampala.csv',
                 stringsAsFactors=FALSE)


Data$country_id <- as.numeric(factor(Data$country_name))

m <- 10

imp <- mice(Data[,c("kampala",
                 "sidea.1991",
                 "sideb.1991",
                 "my.mil.cap.lag",
                 "v2x_polyarchy.lag",
                 "v2x_libdem.lag",     
                 "v2x_partipdem.lag",
                 "v2x_delibdem.lag", 
                 "v2x_egaldem.lag",
                 "gdp.cap.lag",
                 "pop.lag",
                 "common.law",
                 "year",
                 "rule.law.lag",
                 "aid.recipient",  ## this is the lag variable, see 19-variables.R
                 "aid.recipient.q", ## this is the lag variable, see 19-variables.R
                 "All_NGO.lag",
                 "country_id")], m = m, maxit = 10, seed = 3210)

models <- list()
model_results <- list()
merged_imputations <- list()
m1.model_results <- list()
m2.model_results <- list()
m3.model_results <- list()
m4.model_results <- list()
m5.model_results <- list()
m6.model_results <- list()

for (i in 1:m) {
    complete_data <- complete(imp, i)

    complete_data$.imp <- i
    
    merged_data <- merge(complete_data, 
                         Data[,c("country_id","year","country_name",
                                 "kampala.t0","kampala.t1",
                                 "sidea.1945",
                                 "sideb.1945",
                                 "sidea.force.1945",
                                 "sideb.force.1945",
                                 "sidea.no.force.1945",
                                 "sideb.no.force.1945",
                                 "sidea.force.1991",
                                 "sideb.force.1991",
                                 "sidea.no.force.1991",
                                 "sideb.no.force.1991"
                                 )], 
                        by = c("country_id", "year"),  # Note: quoted variable names
                        all.x = TRUE)  # Keep all rows from complete_data
    
    merged_imputations[[i]] <- merged_data
        

    current_data <- merged_imputations[[i]]

    m1 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
              sidea.1991
            + sideb.1991
            + my.mil.cap.lag
            + v2x_polyarchy.lag
            + log(gdp.cap.lag)
            + log(pop.lag)
            + common.law
            + aid.recipient
            + cluster(country_id)
           ,data=current_data)

     m1.vcov_cluster <- vcovCL(m1, cluster = current_data$country_id)

     m1.model_results[[i]] <- list(
        coefficients = coef(m1),
        variance = m1.vcov_cluster,
        n = nrow(current_data)
       )
        
    m2 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                sidea.1945
            + sideb.1945
            + my.mil.cap.lag
            + v2x_polyarchy.lag
            + log(gdp.cap.lag)
            + log(pop.lag)
            + common.law
            + aid.recipient
            + cluster(country_name)
           ,data=current_data)

    m2.vcov_cluster <- vcovCL(m2, cluster = current_data$country_id)

    m2.model_results[[i]] <- list(
        coefficients = coef(m2),
        variance = m2.vcov_cluster,
        n = nrow(current_data)
       )
    

    m3 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                    sidea.force.1991
                + sideb.force.1991
                + my.mil.cap.lag
                + v2x_polyarchy.lag
                + log(gdp.cap.lag)
                + log(pop.lag)
                + common.law
                + aid.recipient
                + cluster(country_name)
               ,data=current_data)

    m3.vcov_cluster <- vcovCL(m3, cluster = current_data$country_id)

    m3.model_results[[i]] <- list(
        coefficients = coef(m3),
        variance = m3.vcov_cluster,
        n = nrow(current_data)
       )
    

    m4 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                    sidea.force.1945
            + sideb.force.1945
            + my.mil.cap.lag
            + v2x_polyarchy.lag
            + log(gdp.cap.lag)
            + log(pop.lag)
            + common.law
            + aid.recipient
            + cluster(country_name)
           ,data=current_data)

    m4.vcov_cluster <- vcovCL(m4, cluster = current_data$country_id)

    m4.model_results[[i]] <- list(
        coefficients = coef(m4),
        variance = m4.vcov_cluster,
        n = nrow(current_data)
       )
    

    m5 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                    sidea.no.force.1991
                + sideb.no.force.1991
                + my.mil.cap.lag
                + v2x_polyarchy.lag
                + log(gdp.cap.lag)
                + log(pop.lag)
                + common.law
                + aid.recipient
                + cluster(country_name)
               ,data=current_data)
    
    m5.vcov_cluster <- vcovCL(m5, cluster = current_data$country_id)

     m5.model_results[[i]] <- list(
        coefficients = coef(m5),
        variance = m5.vcov_cluster,
        n = nrow(current_data)
       )
    

    m6 <- coxph(Surv(kampala.t0,kampala.t1,kampala)~
                    sidea.no.force.1945
                + sideb.no.force.1945
                + my.mil.cap.lag
                + v2x_polyarchy.lag
                + log(gdp.cap.lag)
                + log(pop.lag)
                + common.law
                + aid.recipient
                + cluster(country_name)
               ,data=current_data)
    
    m6.vcov_cluster <- vcovCL(m6, cluster = current_data$country_id)

    m6.model_results[[i]] <- list(
        coefficients = coef(m6),
        variance = m6.vcov_cluster,
        n = nrow(current_data)
       )
}


m1.pooled_results <- MIcombine(
    results = lapply(m1.model_results, function(x) x$coefficients),
    variances = lapply(m1.model_results, function(x) x$variance)
)

m2.pooled_results <- MIcombine(
    results = lapply(m2.model_results, function(x) x$coefficients),
    variances = lapply(m2.model_results, function(x) x$variance)
)

m3.pooled_results <- MIcombine(
    results = lapply(m3.model_results, function(x) x$coefficients),
    variances = lapply(m3.model_results, function(x) x$variance)
)

m4.pooled_results <- MIcombine(
    results = lapply(m4.model_results, function(x) x$coefficients),
    variances = lapply(m4.model_results, function(x) x$variance)
)

m5.pooled_results <- MIcombine(
    results = lapply(m5.model_results, function(x) x$coefficients),
    variances = lapply(m5.model_results, function(x) x$variance)
)

m6.pooled_results <- MIcombine(
    results = lapply(m6.model_results, function(x) x$coefficients),
    variances = lapply(m6.model_results, function(x) x$variance)
)



# View final results
summary(m1.pooled_results)
summary(m2.pooled_results)
summary(m3.pooled_results)
summary(m4.pooled_results)
summary(m5.pooled_results)
summary(m6.pooled_results)


coef.m1 <- m1.pooled_results$coefficients
se.m1 <- sqrt(diag(m1.pooled_results$variance))
z.m1 <- coef.m1/se.m1
p.m1 <- 2*pnorm(-abs(z.m1))
conf.m1 <- confint(m1.pooled_results)
m.m1 <- cbind(coef.m1,se.m1,p.m1,conf.m1)

coef.m2 <- m2.pooled_results$coefficients
se.m2 <- sqrt(diag(m2.pooled_results$variance))
z.m2 <- coef.m2/se.m2
p.m2 <- 2*pnorm(-abs(z.m2))
conf.m2 <- confint(m2.pooled_results)
m.m2 <- cbind(coef.m2,se.m2,p.m2,conf.m2)

coef.m3 <- m3.pooled_results$coefficients
se.m3 <- sqrt(diag(m3.pooled_results$variance))
z.m3 <- coef.m3/se.m3
p.m3 <- 2*pnorm(-abs(z.m3))
conf.m3 <- confint(m3.pooled_results)
m.m3 <- cbind(coef.m3,se.m3,p.m3,conf.m3)

coef.m4 <- m4.pooled_results$coefficients
se.m4 <- sqrt(diag(m4.pooled_results$variance))
z.m4 <- coef.m4/se.m4
p.m4 <- 2*pnorm(-abs(z.m4))
conf.m4 <- confint(m4.pooled_results)
m.m4 <- cbind(coef.m4,se.m4,p.m4,conf.m4)

coef.m5 <- m5.pooled_results$coefficients
se.m5 <- sqrt(diag(m5.pooled_results$variance))
z.m5 <- coef.m5/se.m5
p.m5 <- 2*pnorm(-abs(z.m5))
conf.m5 <- confint(m5.pooled_results)
m.m5 <- cbind(coef.m5,se.m5,p.m5,conf.m5)

coef.m6 <- m6.pooled_results$coefficients
se.m6 <- sqrt(diag(m6.pooled_results$variance))
z.m6 <- coef.m6/se.m6
p.m6 <- 2*pnorm(-abs(z.m6))
conf.m6 <- confint(m6.pooled_results)
m.m6 <- cbind(coef.m6,se.m6,p.m6,conf.m6)

                 

rownames(m.m1) <- rownames(m.m2) <- rownames(m.m3) <-
    rownames(m.m4) <- rownames(m.m5) <- rownames(m.m6) <- 
    c("MID side A",
      "MID side B",
      "Milex per cap",
      "Polyarchy",
      "GDP cap (log)",
      "Population (log)",
      "Common law",
      "Aid recipient")

colnames(m.m1) <- colnames(m.m2) <- colnames(m.m3) <-
    colnames(m.m4) <- colnames(m.m5) <- colnames(m.m6) <-
    c("b","std.err","p","X2.5.","X97.5.")

t.m1 <- data.frame(round(m.m1,3))
t.m1$Variables <- rownames(m.m1)
rownames(t.m1) <- NULL
t.m1$type <- "Overall,\nfrom 1991"

t.m2 <- data.frame(round(m.m2,3))
t.m2$Variables <- rownames(m.m2)
rownames(t.m2) <- NULL
t.m2$type <- "Overall,\nfrom 1945"

t.m3 <- data.frame(round(m.m3,3))
t.m3$Variables <- rownames(m.m3)
rownames(t.m3) <- NULL
t.m3$type <- "Force,\nfrom 1991"

t.m4 <- data.frame(round(m.m4,3))
t.m4$Variables <- rownames(m.m4)
rownames(t.m4) <- NULL
t.m4$type <- "Force,\nfrom 1945"

t.m5 <- data.frame(round(m.m5,3))
t.m5$Variables <- rownames(m.m5)
rownames(t.m5) <- NULL
t.m5$type <- "No force,\nfrom 1991"

t.m6 <- data.frame(round(m.m6,3))
t.m6$Variables <- rownames(m.m6)
rownames(t.m6) <- NULL
t.m6$type <- "No force,\nfrom 1945"

t.combined <- rbind(t.m1,t.m2,t.m3,t.m4,t.m5,t.m6)

## "Population (log)",
## "MID side B",      
## "Common law",      
## "GDP cap (log)",   
## "Polyarchy",       
## "MID side A"

t.combined$Variables.factor <- factor(t.combined$Variable,
                                      levels=c("Aid recipient",
                                               "Common law",
                                               "Population (log)",
                                               "GDP cap (log)",
                                               "Polyarchy",
                                               "Milex per cap",
                                               "MID side B",
                                               "MID side A"
                                               ),
                                      ordered=TRUE)

t.combined$type.factor <- factor(t.combined$type,
                                 levels=c("No force,\nfrom 1945",
                                          "Force,\nfrom 1945",
                                          "Overall,\nfrom 1945",
                                          "No force,\nfrom 1991",
                                          "Force,\nfrom 1991",
                                          "Overall,\nfrom 1991"),
                                 ordered=TRUE)

## http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/#a-colorblind-friendly-palette
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

## knitr FIGURE_WITH_AID
figure.a8 <- ggplot(t.combined, aes(color=type.factor)) +
    geom_hline(yintercept=0, color=gray(1/2), lty=2) +
#    geom_linerange(aes(x=Variables, ymin=X2.5., ymax=X97.5.),
#                       lwd=1, position=position_dodge(width = 1/2)) +
    geom_pointrange(aes(x=Variables.factor, y=b, ymin=X2.5., ymax=X97.5.),
                    lwd=2, position=position_dodge(width = 3/5),
                    shape=21, fill="WHITE") +
#    scale_color_brewer(palette="Set1") +
    scale_color_manual(values=cbbPalette) +
    scale_y_continuous(limits=c(-5,2),breaks=seq(-5,2,1)) + #name="SATT",
    coord_flip() +
    theme_bw() +
#    ggtitle("") +
    theme(axis.text.x = element_text(size = 25),#angle=45),
          axis.text.y = element_text(size = 25),
          axis.title.x = element_blank(),
#        axis.title.x = element_text("b", size=16),
          axis.title.y = element_blank(),
#          axis.title.y = element_text(size=16),
#          legend.title=element_blank(),
          legend.text=element_text(size=18),
          legend.key.size=unit(.5, "in"),
          legend.key.width=unit(1, "in"),
          legend.title=element_text(size = 18, face="bold"),
          legend.position="bottom",
          plot.title = element_text(lineheight=.8, face="bold",size=40)) +
    labs(color = "MID track record\n") +
    guides(color=guide_legend(nrow=2,byrow=TRUE,reverse=TRUE))

ggsave("gc-jg-project/kampala/paper/figures/app-figure-a8.png", plot = figure.a8, width = 14, height = 8, device = "png")


t.n.obs <- m1$n
t.n.fail <- m1$nevent
t.n.fail.tot <- table(Data$kampala)[2]
t.miss.vals <- dim(Data)[1]-t.n.obs
